The code chunk below is used to install and load the required packages onto RStudio.
packages = c('tidyverse','treemap','ggrepel','lubridate','gapminder','gganimate','ggiraph','plotly','zoo', 'tmap', 'sf','trelliscopejs', 'hrbrthemes','transformr','d3treeR',
'lubridate','clock',
'sftime','rmarkdown','data.table')
for(p in packages){
if(!require(p, character.only =T)){
install.packages(p)
}
library(p, character.only =T)
}
The code chuck below import Employers.csv,
Buildings.csv, Jobs.csv, CheckinJournal.csv
and Participants.csv from the data folder into R by using
read_csv() and save it as an tibble data frame.
Employers <- read_csv("data/Employers.csv")
Buildings <- read_csv("data/Buildings.csv")
Jobs <- read_csv("data/Jobs.csv")
Checkin <- read_csv("data/CheckinJournal.csv")
Participants <- read_csv("data/Participants.csv")
Checkin <- Checkin %>%
rename('employerId' = 'venueId')
#Extract the date from timestamp
Checkin$Date <- as.Date(Checkin$timestamp)
#Filter rows with workplace as value
Workplace_Checkin <- Checkin[grep("Workplace", Checkin$venueType),]
#Assign Running Week Number
Workplace_Checkin <- Workplace_Checkin %>%
mutate(Week_Num = as.double(ceiling(difftime(Workplace_Checkin$Date, strptime("01.03.2022", format = "%d.%m.%Y"),units="weeks"))))
#Compute no. of employees that report to work during that week (5day work week)
Count_Checkin <- Workplace_Checkin %>%
group_by(Week_Num, employerId) %>%
summarise('Num_of_Employees'= n_distinct(participantId)) %>%
ungroup()
#Calculate Percentage Change
Count_Checkin <- Count_Checkin %>%
group_by(employerId) %>%
mutate(Perc_Chg = round((Num_of_Employees - lag(Num_of_Employees))/lag(Num_of_Employees)*100,0))%>%
replace(is.na(.), 0)
Count_Checkin <- merge(Count_Checkin, Employers, by ='employerId')
write_csv(Count_Checkin, "data/csv/Count_Checkin.csv")
Count_Checkin_sf <- read_sf("data/csv/Count_Checkin.csv",
options = "GEOM_POSSIBLE_NAMES=location")
Count_Checkin_sf$Num_of_Employees <- as.numeric(Count_Checkin_sf$Num_of_Employees)
Count_Checkin_sf$Week_Num <- as.numeric(Count_Checkin_sf$Week_Num)
#Compute no. of employers that each participants every week
Change_Job <- Workplace_Checkin %>%
group_by(participantId,Week_Num) %>%
summarise('Num_of_Employers'= n_distinct(employerId)) %>%
filter(Num_of_Employers >1) %>%
ungroup()
Change_Job <- merge(Change_Job, Participants, by ='participantId')
buildings <- read_sf("data/Buildings.csv",
options = "GEOM_POSSIBLE_NAMES=location")
employers <- read_sf("data/Employers.csv",
options = "GEOM_POSSIBLE_NAMES=location")
Employers <- Employers %>% left_join(Buildings,by="buildingId")
Next, we use outer join on Employers and Jobs based
on employerId to have a full overview on all the jobs that
are offered by each employer.
Employers = merge(x=Employers,y=Jobs,by="employerId",all=TRUE)
d <- highlight_key(Change_Job)
p1 <- ggplot(d, aes(x= as.factor(Num_of_Employers), fill = educationLevel)) +
geom_bar() +
facet_wrap(~educationLevel)+
ggtitle('Participants with >1 Employers') +
xlab("Numbers of Employers") +
ylab("No. of\nParticipants") +
theme(axis.title.y= element_text(angle=0), axis.ticks.x= element_blank(),
axis.line= element_line(color= 'grey'))
gg <- highlight(ggplotly(p1),
"plotly_selected")
crosstalk::bscols(gg,
widths = c(12,12),
DT::datatable(d,
rownames = FALSE),
class = 'display')
r <- ggplot(Count_Checkin, aes(x= as.factor(Week_Num), y= Num_of_Employees)) +
geom_point(color='red') +
labs(x= 'Week', y= 'Number of\nEmployees',
title = 'Weekly Turnover of Each Employers') +
ylim(0,28) +
facet_trelliscope(~ employerId,
nrow = 3, ncol = 3, width = 800,
path = 'trellisr/',
self_contained = TRUE) +
theme(axis.title.y= element_text(angle=0),
axis.text.x = element_text(angle = 0, vjust = 0.5, hjust = 0.3),
axis.ticks.x= element_blank(),
panel.background= element_blank(),
axis.line= element_line(color= 'grey'))
r
Jobs$educationRequirement = factor(Jobs$educationRequirement, levels = c('Low', 'HighSchoolOrCollege', 'Bachelors','Graduate'))
p<- ggplot(Jobs, aes(x = educationRequirement, y = hourlyRate, fill=educationRequirement)) +
ggdist::stat_halfeye(
adjust = .5,
width = .6,
.width = 0,
justification = -.3,
point_colour = NA) +
geom_boxplot(
width = .25,
outlier.shape = NA
) +
geom_point(
size = 1.3,
alpha = .3,
position = position_jitter(
seed = 1, width = .1
),
aes(text = paste('Job ID: ', Jobs$jobId,
'Employer ID: ', Jobs$employerId,
'Hourly Rate: $', Jobs$hourlyRate))
) +
coord_cartesian(xlim = c(1.2, NA), clip = "off")+
coord_flip() +
ggtitle(label = "Wage Distribution for Different Education Level",
subtitle = "High Wages For Higher Educated")+
theme_minimal()+
theme(plot.title = element_text(size=14, face="bold",hjust = 0.5),
plot.subtitle = element_text(size=12,hjust = 0.5,color='mediumvioletred'))+
theme(axis.title.y= element_blank(),
panel.background= element_blank(), axis.line= element_line(color= 'grey'))
ggplotly(p, tooltip = 'text')
d3tree(tm, rootname = "Age and No. of Employees under Each Employer" )
p <- ggplot(Count_Checkin, aes(x= as.factor(Perc_Chg), y= Num_of_Employees,
color= employerId)) +
geom_point(alpha = 0.5,
size =2) +
scale_colour_gradientn(colours=rainbow(4)) +
theme_classic() +
labs(title = 'Week:{closest_state}',
x = '% Change',
y = 'Number of\nEmployees') +
transition_states(Week_Num) +
ease_aes('linear')
animate(p, nframes = 100, fps = 3)